library(tidyverse)
library(readxl)
library(janitor)
library(here)
library(assertr)
candy_2015 <- read_xlsx("raw_data/boing-boing-candy-2015.xlsx") %>%
clean_names()
candy_2016 <- read_xlsx("raw_data/boing-boing-candy-2016.xlsx") %>%
clean_names()
candy_2017 <- read_xlsx("raw_data/boing-boing-candy-2017.xlsx") %>%
clean_names()
New names:
candy_2015
candy_2016
candy_2017
NA
NA
names_2015 <- names(candy_2015)
names_2016 <- names(candy_2016)
names_2017 <- names(candy_2017)
Clean 2015 data
candy_cleaned_2015 <- candy_2015 %>%
rename(age = how_old_are_you,
going_out = are_you_going_actually_going_trick_or_treating_yourself) %>%
mutate(year = str_extract(timestamp, '[0-9]{1,4}'), .after = timestamp) %>%
mutate(id = row_number(timestamp) + 1e6) %>%
mutate(gender = NA_character_, .after = age) %>%
mutate(country = NA_character_, .after = age) %>%
select(id, year:york_peppermint_patties, necco_wafers) %>%
# replace all non integer age inputs as NA, convert values to integers
mutate(age = as.integer(age), year = as.integer(year)) %>%
pivot_longer(butterfinger:necco_wafers, names_to = "candy_name", values_to = "rating") %>%
clean_candy_names() %>%
select(id, year, going_out, age, gender, country, candy_name, rating)
Warning: NAs introduced by coercionWarning: NAs introduced by coercion to integer range
candy_cleaned_2015
NA
Clean 2016 data
candy_cleaned_2016 <- candy_2016 %>%
rename(going_out = are_you_going_actually_going_trick_or_treating_yourself,
age = how_old_are_you,
country = which_country_do_you_live_in,
gender = your_gender) %>%
mutate(id = row_number(timestamp) + 2e6, .before = timestamp) %>%
mutate(year = str_extract(timestamp, '[0-9]{1,4}'), .after = timestamp) %>%
clean_country_names() %>%
select(id, year:york_peppermint_patties, gender, -which_state_province_county_do_you_live_in) %>%
# replace all non integer age inputs as NA, convert values to integers
mutate(age = as.integer(age), year = as.integer(year)) %>%
pivot_longer(x100_grand_bar:york_peppermint_patties, names_to = "candy_name", values_to = "rating") %>%
clean_candy_names() %>%
select(id, year, going_out, age, gender, country, candy_name, rating)
Warning: NAs introduced by coercionWarning: NAs introduced by coercion to integer range
candy_cleaned_2016
NA
candy_cleaned_2017 <- candy_2017 %>%
rename(id = internal_id) %>%
pivot_longer(q1_going_out:q11_day, names_to = "col_names", values_to = "value") %>%
select(id, col_names, value) %>%
mutate(col_names = str_remove(col_names, "q[0-9]_")) %>%
pivot_wider(names_from = col_names, values_from = value) %>%
clean_names() %>%
mutate(year = as.integer(2017), .after = id) %>%
mutate(age = as.integer(age)) %>%
clean_country_names() %>%
pivot_longer(x100_grand_bar:york_peppermint_patties, names_to = "candy_name", values_to = "rating") %>%
clean_candy_names () %>%
select(id, year, going_out, age, gender, country, candy_name, rating) %>%
filter(!is.na(rating))
Warning: NAs introduced by coercion
candy_cleaned_2017
NA
Bind tables together
combined_data_long <- candy_cleaned_2015 %>%
bind_rows(candy_cleaned_2016) %>%
bind_rows(candy_cleaned_2017) %>%
#clean_candy_names() %>%
mutate(age = if_else((age > 0 & age < 100), age, NA_integer_)) %>%
filter(!is.na(rating))
combined_data_long
combined_data_wide <- combined_data_long %>%
pivot_wider(names_from = candy_name, values_from = rating)
combined_data_wide
NA
Analysis questions What is the total number of candy ratings given
across the three years. (Number of candy ratings, not the number of
raters. Don’t count missing values)
combined_data_long %>%
filter(!is.na(rating)) %>%
summarise(total_candy_ratings = n())
NA
What was the average age of people who are going out trick or
treating? # age column
combined_data_wide %>%
filter(going_out == "Yes") %>%
summarise(avg_age = mean(age, na.rm = TRUE))
NA
What was the average age of people who are not going trick or
treating? # age column
combined_data_wide %>%
filter(going_out == "No") %>%
summarise(avg_age = mean(age, na.rm = TRUE))
NA
NA
For each of joy, despair and meh, which candy bar received the most
of these ratings? # candy ratings
combined_data_long %>%
group_by(rating, candy_name) %>%
summarise(count = n()) %>%
slice_max(count)
`summarise()` has grouped output by 'rating'. You can override using the `.groups` argument.
How many people rated Starburst as despair? # starburst column (candy
columns)
combined_data_long %>%
filter(candy_name == "starburst", rating == "DESPAIR") %>%
summarise(count = n())
NA
NA
For the next three questions, count despair as -1, joy as +1, and meh
as 0.
What was the most popular candy bar by this rating system for each
gender in the dataset ? # candy ratings, gender
candy_scored <- combined_data_long %>%
mutate(score = case_when(
rating == "JOY" ~ 1,
rating == "DESPAIR" ~ -1,
TRUE ~ 0
))
candy_scored %>%
filter(!is.na(gender)) %>%
group_by(gender, candy_name) %>%
summarise(total_score = sum(score)) %>%
slice_max(total_score)
`summarise()` has grouped output by 'gender'. You can override using the `.groups` argument.
NA
NA
What was the most popular candy bar in each year? # date or year
candy_scored %>%
group_by(year, candy_name) %>%
summarise(total_score = sum(score)) %>%
slice_max(total_score)
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
What was the most popular candy bar by this rating for people in US,
Canada, UK, and all other countries?
candy_scored %>%
filter(country == c("USA", "Canada", "UK", "Other")) %>%
group_by(country, candy_name) %>%
summarise(total_score = sum(score)) %>%
slice_max(total_score)
`summarise()` has grouped output by 'country'. You can override using the `.groups` argument.
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCg0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHJlYWR4bCkNCmxpYnJhcnkoamFuaXRvcikNCmxpYnJhcnkoaGVyZSkNCmxpYnJhcnkoYXNzZXJ0cikNCg0KYGBgDQoNCmBgYHtyfQ0KDQpjYW5keV8yMDE1IDwtIHJlYWRfeGxzeCgicmF3X2RhdGEvYm9pbmctYm9pbmctY2FuZHktMjAxNS54bHN4IikgJT4lDQogIGNsZWFuX25hbWVzKCkNCmNhbmR5XzIwMTYgPC0gcmVhZF94bHN4KCJyYXdfZGF0YS9ib2luZy1ib2luZy1jYW5keS0yMDE2Lnhsc3giKSAlPiUNCiAgY2xlYW5fbmFtZXMoKQ0KY2FuZHlfMjAxNyA8LSByZWFkX3hsc3goInJhd19kYXRhL2JvaW5nLWJvaW5nLWNhbmR5LTIwMTcueGxzeCIpICU+JQ0KICBjbGVhbl9uYW1lcygpDQoNCmBgYA0KDQpgYGB7cn0NCg0KY2FuZHlfMjAxNQ0KY2FuZHlfMjAxNg0KY2FuZHlfMjAxNw0KDQoNCmBgYA0KDQoNCg0KDQpgYGB7cn0NCg0KbmFtZXNfMjAxNSA8LSBuYW1lcyhjYW5keV8yMDE1KQ0KbmFtZXNfMjAxNiA8LSBuYW1lcyhjYW5keV8yMDE2KQ0KbmFtZXNfMjAxNyA8LSBuYW1lcyhjYW5keV8yMDE3KQ0KDQpgYGANCg0KIyMjIENsZWFuIDIwMTUgZGF0YQ0KDQpgYGB7cn0NCg0KY2FuZHlfY2xlYW5lZF8yMDE1IDwtIGNhbmR5XzIwMTUgJT4lIA0KICByZW5hbWUoYWdlID0gaG93X29sZF9hcmVfeW91LCANCiAgICAgICAgIGdvaW5nX291dCA9IGFyZV95b3VfZ29pbmdfYWN0dWFsbHlfZ29pbmdfdHJpY2tfb3JfdHJlYXRpbmdfeW91cnNlbGYpICU+JSANCiAgbXV0YXRlKHllYXIgPSBzdHJfZXh0cmFjdCh0aW1lc3RhbXAsICdbMC05XXsxLDR9JyksIC5hZnRlciA9IHRpbWVzdGFtcCkgJT4lDQogIG11dGF0ZShpZCA9IHJvd19udW1iZXIodGltZXN0YW1wKSArIDFlNikgJT4lIA0KICBtdXRhdGUoZ2VuZGVyID0gTkFfY2hhcmFjdGVyXywgLmFmdGVyID0gYWdlKSAlPiUgDQogIG11dGF0ZShjb3VudHJ5ID0gTkFfY2hhcmFjdGVyXywgLmFmdGVyID0gYWdlKSAlPiUgDQogIHNlbGVjdChpZCwgeWVhcjp5b3JrX3BlcHBlcm1pbnRfcGF0dGllcywgbmVjY29fd2FmZXJzKSAlPiUgDQogICMgcmVwbGFjZSBhbGwgbm9uIGludGVnZXIgYWdlIGlucHV0cyBhcyBOQSwgY29udmVydCB2YWx1ZXMgdG8gaW50ZWdlcnMNCiAgbXV0YXRlKGFnZSA9IGFzLmludGVnZXIoYWdlKSwgeWVhciA9IGFzLmludGVnZXIoeWVhcikpICU+JSANCiAgcGl2b3RfbG9uZ2VyKGJ1dHRlcmZpbmdlcjpuZWNjb193YWZlcnMsIG5hbWVzX3RvID0gImNhbmR5X25hbWUiLCB2YWx1ZXNfdG8gPSAicmF0aW5nIikgJT4lDQogIGNsZWFuX2NhbmR5X25hbWVzKCkgJT4lIA0KICBzZWxlY3QoaWQsIHllYXIsIGdvaW5nX291dCwgYWdlLCBnZW5kZXIsIGNvdW50cnksIGNhbmR5X25hbWUsIHJhdGluZykNCg0KY2FuZHlfY2xlYW5lZF8yMDE1DQoNCmBgYA0KDQojIyMgQ2xlYW4gMjAxNiBkYXRhDQoNCmBgYHtyfQ0KDQpjYW5keV9jbGVhbmVkXzIwMTYgPC0gY2FuZHlfMjAxNiAlPiUgDQogIHJlbmFtZShnb2luZ19vdXQgPSBhcmVfeW91X2dvaW5nX2FjdHVhbGx5X2dvaW5nX3RyaWNrX29yX3RyZWF0aW5nX3lvdXJzZWxmLA0KICAgICAgICAgYWdlID0gaG93X29sZF9hcmVfeW91LA0KICAgICAgICAgY291bnRyeSA9IHdoaWNoX2NvdW50cnlfZG9feW91X2xpdmVfaW4sDQogICAgICAgICBnZW5kZXIgPSB5b3VyX2dlbmRlcikgJT4lDQogIG11dGF0ZShpZCA9IHJvd19udW1iZXIodGltZXN0YW1wKSArIDJlNiwgLmJlZm9yZSA9IHRpbWVzdGFtcCkgJT4lIA0KICBtdXRhdGUoeWVhciA9IHN0cl9leHRyYWN0KHRpbWVzdGFtcCwgJ1swLTldezEsNH0nKSwgLmFmdGVyID0gdGltZXN0YW1wKSAlPiUNCiAgY2xlYW5fY291bnRyeV9uYW1lcygpICU+JSANCiAgc2VsZWN0KGlkLCB5ZWFyOnlvcmtfcGVwcGVybWludF9wYXR0aWVzLCBnZW5kZXIsIC13aGljaF9zdGF0ZV9wcm92aW5jZV9jb3VudHlfZG9feW91X2xpdmVfaW4pICU+JSANCiAgIyByZXBsYWNlIGFsbCBub24gaW50ZWdlciBhZ2UgaW5wdXRzIGFzIE5BLCBjb252ZXJ0IHZhbHVlcyB0byBpbnRlZ2Vycw0KICBtdXRhdGUoYWdlID0gYXMuaW50ZWdlcihhZ2UpLCB5ZWFyID0gYXMuaW50ZWdlcih5ZWFyKSkgJT4lDQogIHBpdm90X2xvbmdlcih4MTAwX2dyYW5kX2Jhcjp5b3JrX3BlcHBlcm1pbnRfcGF0dGllcywgbmFtZXNfdG8gPSAiY2FuZHlfbmFtZSIsIHZhbHVlc190byA9ICJyYXRpbmciKSAlPiUNCiAgY2xlYW5fY2FuZHlfbmFtZXMoKSAlPiUgDQogIHNlbGVjdChpZCwgeWVhciwgZ29pbmdfb3V0LCBhZ2UsIGdlbmRlciwgY291bnRyeSwgY2FuZHlfbmFtZSwgcmF0aW5nKQ0KDQoNCmNhbmR5X2NsZWFuZWRfMjAxNg0KDQpgYGANCg0KYGBge3J9DQoNCmNhbmR5X2NsZWFuZWRfMjAxNyA8LSBjYW5keV8yMDE3ICU+JSANCiAgcmVuYW1lKGlkID0gaW50ZXJuYWxfaWQpICU+JSANCiAgcGl2b3RfbG9uZ2VyKHExX2dvaW5nX291dDpxMTFfZGF5LCBuYW1lc190byA9ICJjb2xfbmFtZXMiLCB2YWx1ZXNfdG8gPSAidmFsdWUiKSAlPiUNCiAgc2VsZWN0KGlkLCBjb2xfbmFtZXMsIHZhbHVlKSAlPiUNCiAgbXV0YXRlKGNvbF9uYW1lcyA9IHN0cl9yZW1vdmUoY29sX25hbWVzLCAicVswLTldXyIpKSAlPiUNCiAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IGNvbF9uYW1lcywgdmFsdWVzX2Zyb20gPSB2YWx1ZSkgJT4lDQogIGNsZWFuX25hbWVzKCkgJT4lDQogIG11dGF0ZSh5ZWFyID0gYXMuaW50ZWdlcigyMDE3KSwgLmFmdGVyID0gaWQpICU+JQ0KICBtdXRhdGUoYWdlID0gYXMuaW50ZWdlcihhZ2UpKSAlPiUNCiAgY2xlYW5fY291bnRyeV9uYW1lcygpICU+JSANCiAgcGl2b3RfbG9uZ2VyKHgxMDBfZ3JhbmRfYmFyOnlvcmtfcGVwcGVybWludF9wYXR0aWVzLCBuYW1lc190byA9ICJjYW5keV9uYW1lIiwgdmFsdWVzX3RvID0gInJhdGluZyIpICU+JSANCiAgY2xlYW5fY2FuZHlfbmFtZXMgKCkgJT4lIA0KICBzZWxlY3QoaWQsIHllYXIsIGdvaW5nX291dCwgYWdlLCBnZW5kZXIsIGNvdW50cnksIGNhbmR5X25hbWUsIHJhdGluZykgJT4lIA0KICBmaWx0ZXIoIWlzLm5hKHJhdGluZykpDQoNCmNhbmR5X2NsZWFuZWRfMjAxNw0KDQpgYGANCg0KIyBCaW5kIHRhYmxlcyB0b2dldGhlcg0KDQpgYGB7cn0NCg0KY29tYmluZWRfZGF0YV9sb25nIDwtIGNhbmR5X2NsZWFuZWRfMjAxNSAlPiUgDQogIGJpbmRfcm93cyhjYW5keV9jbGVhbmVkXzIwMTYpICU+JSANCiAgYmluZF9yb3dzKGNhbmR5X2NsZWFuZWRfMjAxNykgJT4lIA0KICAjY2xlYW5fY2FuZHlfbmFtZXMoKSAlPiUgDQogIG11dGF0ZShhZ2UgPSBpZl9lbHNlKChhZ2UgPiAwICYgYWdlIDwgMTAwKSwgYWdlLCBOQV9pbnRlZ2VyXykpICU+JSANCiAgZmlsdGVyKCFpcy5uYShyYXRpbmcpKQ0KDQpjb21iaW5lZF9kYXRhX2xvbmcNCg0KY29tYmluZWRfZGF0YV93aWRlIDwtIGNvbWJpbmVkX2RhdGFfbG9uZyAlPiUNCiAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IGNhbmR5X25hbWUsIHZhbHVlc19mcm9tID0gcmF0aW5nKQ0KY29tYmluZWRfZGF0YV93aWRlDQoNCmBgYA0KDQoNCi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQoNCkFuYWx5c2lzIHF1ZXN0aW9ucw0KV2hhdCBpcyB0aGUgdG90YWwgbnVtYmVyIG9mIGNhbmR5IHJhdGluZ3MgZ2l2ZW4gYWNyb3NzIHRoZSB0aHJlZSB5ZWFycy4gKE51bWJlciBvZiBjYW5keSByYXRpbmdzLCBub3QgdGhlIG51bWJlciBvZiByYXRlcnMuIERvbuKAmXQgY291bnQgbWlzc2luZyB2YWx1ZXMpDQoNCmBgYHtyfQ0KDQpjb21iaW5lZF9kYXRhX2xvbmcgJT4lDQogIGZpbHRlcighaXMubmEocmF0aW5nKSkgJT4lIA0KICBzdW1tYXJpc2UodG90YWxfY2FuZHlfcmF0aW5ncyA9IG4oKSkNCg0KYGBgDQoNCldoYXQgd2FzIHRoZSBhdmVyYWdlIGFnZSBvZiBwZW9wbGUgd2hvIGFyZSBnb2luZyBvdXQgdHJpY2sgb3IgdHJlYXRpbmc/DQojIGFnZSBjb2x1bW4NCg0KYGBge3J9DQoNCmNvbWJpbmVkX2RhdGFfd2lkZSAlPiUgDQogIGZpbHRlcihnb2luZ19vdXQgPT0gIlllcyIpICU+JSANCiAgc3VtbWFyaXNlKGF2Z19hZ2UgPSBtZWFuKGFnZSwgbmEucm0gPSBUUlVFKSkNCg0KYGBgDQoNCg0KV2hhdCB3YXMgdGhlIGF2ZXJhZ2UgYWdlIG9mIHBlb3BsZSB3aG8gYXJlIG5vdCBnb2luZyB0cmljayBvciB0cmVhdGluZz8NCiMgYWdlIGNvbHVtbg0KDQpgYGB7cn0NCg0KY29tYmluZWRfZGF0YV93aWRlICU+JSANCiAgZmlsdGVyKGdvaW5nX291dCA9PSAiTm8iKSAlPiUgDQogIHN1bW1hcmlzZShhdmdfYWdlID0gbWVhbihhZ2UsIG5hLnJtID0gVFJVRSkpDQoNCg0KYGBgDQoNCg0KRm9yIGVhY2ggb2Ygam95LCBkZXNwYWlyIGFuZCBtZWgsIHdoaWNoIGNhbmR5IGJhciByZWNlaXZlZCB0aGUgbW9zdCBvZiB0aGVzZSByYXRpbmdzPw0KIyBjYW5keSByYXRpbmdzDQoNCmBgYHtyfQ0KDQpjb21iaW5lZF9kYXRhX2xvbmcgJT4lDQogIGdyb3VwX2J5KHJhdGluZywgY2FuZHlfbmFtZSkgJT4lIA0KICBzdW1tYXJpc2UoY291bnQgPSBuKCkpICU+JSANCiAgc2xpY2VfbWF4KGNvdW50KSANCg0KYGBgDQoNCg0KDQpIb3cgbWFueSBwZW9wbGUgcmF0ZWQgU3RhcmJ1cnN0IGFzIGRlc3BhaXI/DQojIHN0YXJidXJzdCBjb2x1bW4gKGNhbmR5IGNvbHVtbnMpDQoNCmBgYHtyfQ0KDQpjb21iaW5lZF9kYXRhX2xvbmcgJT4lIA0KICBmaWx0ZXIoY2FuZHlfbmFtZSA9PSAic3RhcmJ1cnN0IiwgcmF0aW5nID09ICJERVNQQUlSIikgJT4lIA0KICBzdW1tYXJpc2UoY291bnQgPSBuKCkpDQogIA0KICANCmBgYA0KDQoNCkZvciB0aGUgbmV4dCB0aHJlZSBxdWVzdGlvbnMsIGNvdW50IGRlc3BhaXIgYXMgLTEsIGpveSBhcyArMSwgYW5kIG1laCBhcyAwLg0KDQoNCg0KV2hhdCB3YXMgdGhlIG1vc3QgcG9wdWxhciBjYW5keSBiYXIgYnkgdGhpcyByYXRpbmcgc3lzdGVtIGZvciBlYWNoIGdlbmRlciBpbiB0aGUgZGF0YXNldCA/DQojIGNhbmR5IHJhdGluZ3MsIGdlbmRlcg0KDQpgYGB7cn0NCg0KY2FuZHlfc2NvcmVkIDwtIGNvbWJpbmVkX2RhdGFfbG9uZyAlPiUgDQogIG11dGF0ZShzY29yZSA9IGNhc2Vfd2hlbigNCiAgICByYXRpbmcgPT0gIkpPWSIgfiAxLA0KICAgIHJhdGluZyA9PSAiREVTUEFJUiIgfiAtMSwNCiAgICBUUlVFIH4gMA0KICApKQ0KDQpgYGANCg0KYGBge3J9DQoNCmNhbmR5X3Njb3JlZCAlPiUNCiAgZmlsdGVyKCFpcy5uYShnZW5kZXIpKSAlPiUgDQogIGdyb3VwX2J5KGdlbmRlciwgY2FuZHlfbmFtZSkgJT4lDQogIHN1bW1hcmlzZSh0b3RhbF9zY29yZSA9IHN1bShzY29yZSkpICU+JSANCiAgc2xpY2VfbWF4KHRvdGFsX3Njb3JlKQ0KICANCg0KYGBgDQoNCg0KV2hhdCB3YXMgdGhlIG1vc3QgcG9wdWxhciBjYW5keSBiYXIgaW4gZWFjaCB5ZWFyPw0KIyBkYXRlIG9yIHllYXINCg0KYGBge3J9DQoNCmNhbmR5X3Njb3JlZCAlPiUgDQogIGdyb3VwX2J5KHllYXIsIGNhbmR5X25hbWUpICU+JSANCiAgc3VtbWFyaXNlKHRvdGFsX3Njb3JlID0gc3VtKHNjb3JlKSkgJT4lIA0KICBzbGljZV9tYXgodG90YWxfc2NvcmUpDQoNCg0KYGBgDQoNCg0KV2hhdCB3YXMgdGhlIG1vc3QgcG9wdWxhciBjYW5keSBiYXIgYnkgdGhpcyByYXRpbmcgZm9yIHBlb3BsZSBpbiBVUywgQ2FuYWRhLCBVSywNCmFuZCBhbGwgb3RoZXIgY291bnRyaWVzPw0KDQpgYGB7cn0NCg0KY2FuZHlfc2NvcmVkICU+JQ0KICBmaWx0ZXIoY291bnRyeSA9PSBjKCJVU0EiLCAiQ2FuYWRhIiwgIlVLIiwgIk90aGVyIikpICU+JSANCiAgZ3JvdXBfYnkoY291bnRyeSwgY2FuZHlfbmFtZSkgJT4lIA0KICBzdW1tYXJpc2UodG90YWxfc2NvcmUgPSBzdW0oc2NvcmUpKSAlPiUgDQogIHNsaWNlX21heCh0b3RhbF9zY29yZSkNCg0KYGBgDQoNCg==